library(tidyverse)
library(janitor)
library(here)
library(readr)
salaries <- read_csv("data/salaries.csv")
beer <- read_delim("data/beer.txt", delim = ";", col_names = TRUE)
inmates <- read_tsv("data/inmates.tsv")
salaries %>%
select(beginning_salary, current_salary)
salaries %>%
filter(age > 50 &
current_salary > 20000)
salaries %>%
arrange(desc(current_salary))
salaries %>%
mutate(beginning_salary_pence = current_salary * 100)
salaries %>%
summarise(minimum_age = min(age),
maximum_age = max(age))
salaries %>%
summarise(minimum_salary = min(current_salary),
maximum_salary = max(current_salary))
salaries %>%
filter(age < 30)
salaries %>%
arrange(educational_level)
salaries %>%
select_if(negate(is.numeric))
salaries %>%
filter(work_experience < 2 | educational_level < 12)
salaries %>%
mutate(current_salary_k = current_salary / 1000)
salaries %>%
filter(gender == "female" & employment_category == "security officer")
salaries %>%
group_by(employment_category) %>%
summarise(average_salary = mean(current_salary))
## `summarise()` ungrouping output (override with `.groups` argument)
salaries %>%
filter(gender == "male") %>%
group_by(employment_category) %>%
summarise(average_salary_male = mean(current_salary))
## `summarise()` ungrouping output (override with `.groups` argument)
salaries %>%
group_by(is_white, gender) %>%
summarise(count_of_employees = n()) %>%
arrange(desc(is_white))
## `summarise()` regrouping output by 'is_white' (override with `.groups` argument)
mutate(df, mean_age = mean(current_salary)) You may need to use View to see the whole data. Now use group_by with mutate and mean. What do you see?
salaries %>%
mutate(mean_salary = mean(current_salary))
salaries %>%
group_by(age) %>%
mutate(mean_salary = mean(current_salary))
salaries %>%
filter(educational_level <= 16) %>%
group_by(educational_level) %>%
summarise(average_salary = mean(current_salary)) %>%
arrange(desc(average_salary))
## `summarise()` ungrouping output (override with `.groups` argument)
salaries %>%
filter(work_experience >= 2) %>%
group_by(employment_category, gender) %>%
summarise(average_salary = mean(current_salary))
## `summarise()` regrouping output by 'employment_category' (override with `.groups` argument)
salaries %>%
group_by(employment_category) %>%
summarise(mean_salary = mean(current_salary),
median_salary = median(current_salary)) %>%
mutate(difference = mean_salary - median_salary) %>%
arrange(desc(difference))
## `summarise()` ungrouping output (override with `.groups` argument)
beer %>%
filter(is.na(carbohydrates))
beer %>%
filter(is.na(brand))
inmates %>%
mutate(gender = recode(
gender, "Male" = "M",
"Female" = "F"))
inmates %>%
mutate(race = str_to_title(race)) %>%
mutate(race = recode(race, "Amer Ind" = "Other",
"Asian" = "Other"))
inmates %>%
mutate(bond_level = if_else(
bond_amount > 1000000, "High", "Normal"
))
inmates %>%
mutate(bond_level = if_else(
bond_amount > 1000000, "High", "Normal"
)) %>%
filter(bond_level == "High") %>%
summarise(high_bond_count = n())
inmates %>%
mutate(detainer = str_to_title(detainer)) %>%
mutate(detainer = case_when(
detainer == "None" ~ "None",
detainer == "Imigration" ~ "Imigration",
detainer == "Federal" ~ "Federal",
TRUE ~ "Other"
))